#installation if necessary
#if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#if (!requireNamespace("devtools", quietly = TRUE)) {
# install.packages("devtools")
#}
#BiocManager::install("DESeq2")
#BiocManager::install("tximeta")
#install.packages("tidyselect")
#loading in the data
library(tidyverse)
library(datapasta) # allows for copy and pasting
library(DESeq2)
library(SummarizedExperiment)
library(tximeta)
library(dplyr)
#filtering and normalization
#BiocManager::install("vsn")
#library(matrixStats)
#library(hexbin)
#library("vsn")
#multivariate analysis (edgeR)
library(edgeR)
if (!require("DT")) install.packages("DT"); library(DT) # for making interactive tables
if (!require("plotly")) install.packages("plotly"); library(plotly) # for making interactive plots
if (!require("gt")) install.packages("gt"); library(gt) # A layered 'grammar of tables' - think ggplot, but for tables
if (!require("stargazer")) install.packages("stargazer"); library(stargazer)
if (!require("rgl")) install.packages("rgl"); library(rgl)
if (!require("dendextend")) install.packages("dendextend"); library(dendextend) # A way to customize dendrograms to make more complicated visualizations
if (!require("ggpattern")) install.packages("ggpattern"); library(ggpattern)
if (!require("data.table")) install.packages("data.table"); library(data.table)
#making pretty figs
#devtools::install_github("thomasp85/patchwork")
library(patchwork)
library(ggplot2)
library(cowplot)
#Check for necessary packages - example cod for making this prettier
#list.of.packages <- c("ggplot2",
# "tidyr",
# "dplyr",
# "Hmisc",
# "qqplotr",
# "ggthemes",
# "fabricatr",
# "gridExtra",
# "grid",
# "kableExtra",
# "sjPlot",
# "cowplot",
# "ggpubr",
# "patchwork",
# "magick", #so you can use the savekable function
# "webshot", #so you can use the savekable function
# "lme4",
# "RcppEigen")
#should install packages that you don't have
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)) install.packages(new.packages)
#Load the package that contains the Cox Propotional Hazard Function
#library(")
rm(list = ls())
#read in sample metadata
meta <- read.delim("study_design.txt", header = TRUE, as.is = TRUE)
#make the SAMPLE column the rownames
rownames(meta) <- meta$SAMPLE
#add a names column (for lmerSeq later)
meta$names <- meta$SAMPLE
#capture sample labels from the design study file
sampleLabels <- meta$SAMPLE
vst_expr <- read_tsv("./variance_stabilized_counts_2025_03_12.tsv")
#get annotations for transcripts by loading file from ncbi ---- EDITED TO MATCH ORs ADDED FROM Mitchell's ANNOTATION----
annotation_file <- ("Ppyr_annotations_ORmodified_2025_03_12.tsv")
#Tx <- read_tsv(annotation_file)
Tx <- read_tsv(annotation_file, col_names = c("target_id", "gene_name", "name", "PpyrOR"), col_types = "cccc")
Tx <- as_tibble(Tx)
#transcript ID needs to be the first column in the dataframe
Tx <- dplyr::select(Tx, "target_id", "gene_name")
Multivariate analysis will allow us to explore sources of variation in the data.
We want to explore:
Note: “run” and “machine” are confounded - only need to use one of these variables
machine <- meta$MACHINE
machine <- factor(machine)
run <- meta$RUN
run <- factor(run)
run_lane <- meta$RUN_LANE_COMBO
run_lane <- factor(run_lane)
tissue <- meta$TISSUE
tissue <- factor(tissue)
sex <- meta$SEX
sex <- factor(sex)
collection_date <- meta$DATE_COLLECTED
collection_date <- factor(collection_date)
preservation_date <- meta$DATE_PRESERVED
preservation_date <- factor(preservation_date)
time_preserved <- as.character(meta$TIME_PRESERVED)
time_preserved <- factor(time_preserved)
date_by_time_preserved <- meta$DATE_BY_TIME_PRESERVED
date_by_time_preserved <- factor(date_by_time_preserved)
individual <- meta$INDIVIDUAL
individual <- factor(individual)
spme <- meta$SPME_USE
spme <- factor(spme)
captivity <- meta$CAPTIVE_DURATION_hrs
captivity <- factor(captivity)
Then, perform some multivariate analyses:
#convert vst_expr to a matrix for pca
vst_expr.m <- as.matrix(vst_expr[,1:ncol(vst_expr)-1])
#change sample names to include sex - helps with visualization later
colnames(vst_expr.m) <- c("SEL534ant (M)", "SEL534BL (M)", "SEL535ant (M)", "SEL535BL (M)", "SEL536ant (M)", "SEL536BL (M)", "SEL543ant (M)", "SEL543BL (M)", "SEL562ant (F)", "SEL562BL (F)", "SEL627ant (F)", "SEL627BL (F)", "SEL672ant (F)", "SEL672BL (F)")
#hierarchical clustering can only work on a data matrix, not a data frame
distance <- dist(t(vst_expr.m), method = "maximum") #other distance methods are "euclidean", maximum", "manhattan", "canberra", "binary" or "minkowski"
clusters <- hclust(distance, method = "average") #other agglomeration methods are "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", or "centroid"
dend <- as.dendrogram(clusters) # Assign the dendrogram so we can edit it
#pdf("hierarchical_clustering_dendrogram_SEL_2025_01_15.pdf")
plot(clusters, labels=labels(dend))
#dev.off()
pca.res <- prcomp(t(vst_expr.m), scale.=F, retx=T)
#look at the PCA result (pca.res) that you just created
summary(pca.res) # Prints variance summary for all principal components.
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 36.2970 18.2412 16.35102 13.8932 13.48014 13.11944
## Proportion of Variance 0.4559 0.1152 0.09252 0.0668 0.06288 0.05956
## Cumulative Proportion 0.4559 0.5711 0.66359 0.7304 0.79327 0.85284
## PC7 PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 11.95800 9.74443 7.38734 6.58893 5.72460 5.47876 5.15069
## Proportion of Variance 0.04948 0.03286 0.01889 0.01502 0.01134 0.01039 0.00918
## Cumulative Proportion 0.90232 0.93518 0.95407 0.96909 0.98043 0.99082 1.00000
## PC14
## Standard deviation 1.597e-13
## Proportion of Variance 0.000e+00
## Cumulative Proportion 1.000e+00
#some useful tools for looking at the data
#pca.res$rotation #$rotation shows you how much each gene influenced each PC (called 'scores')
#pca.res$x # 'x' shows you how much each sample influenced each PC (called 'loadings')
#note that these have a magnitude and a direction (this is the basis for making a PCA plot)
#pdf("screeplot_2025_03_12.pdf")
require(graphics)
#plot(pca.res) #This is the same as screeplot()
screeplot(pca.res) # A screeplot is a standard way to view eigenvalues for each PCA
#dev.off()
#get the variance percentages
pc.var <- pca.res$sdev^2 # sdev^2 captures these eigenvalues from the PCA result
pc.per <- round(pc.var/sum(pc.var)*100, 1) # we can then use these eigenvalues to calculate the percentage variance explained by each PC
#get PCA data as a tibble that can graph
pca.res.df <- as_tibble(pca.res$x)
pca.res.df$samplename <- rownames(pca.res$x)
#write_csv(pca.res.df, "PCA_loadings_table_2025_03_12.csv")
#Plot PC1 vs PC2
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
n = 7
cols = gg_color_hue(n)
ggplot(pca.res.df, aes(x=PC1, y=PC2, fill = tissue, color = individual, shape = sex)) +
geom_point(size = 4, stroke = 1.5) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
scale_fill_manual(values = c("gray31", "white"), labels = c("antenna", "leg")) +
guides(
fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)),
shape = guide_legend(title = "Sex"),
color = guide_legend(title = "Individual"))
#ggsave("PCA_plot_ind_tissue_sex_2025_03_12.png", dev="png")
f <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = machine)) +
geom_point(size = 4) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
h <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = run_lane)) +
geom_point(size = 4) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
i <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = collection_date)) +
geom_point(size = 4) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
j <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = preservation_date)) +
geom_point(size = 4) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
k <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = time_preserved)) +
geom_point(size = 4) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
l <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = date_by_time_preserved)) +
geom_point(size = 4) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
m <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = spme)) +
geom_point(size = 4) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
n <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = captivity)) +
geom_point(size = 4) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
plot_grid(f, h, i, j, k, l, m, n, labels = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), label_size = 12, ncol= 2, align = "hv") # plot_grid() is from the cowplot package
#Plot PC3 vs PC4
e <- ggplot(pca.res.df, aes(x=PC3, y=PC4, fill = tissue, color = individual, shape = sex)) +
geom_point(size = 4, stroke = 1.5) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
scale_fill_manual(values = c("gray31", "white"), labels = c("antenna", "leg")) +
guides(
fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)),
shape = guide_legend(title = "Sex"),
color = guide_legend(title = "Individual"))
e
f <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = machine)) +
geom_point(size = 4) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
h <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = run_lane)) +
geom_point(size = 4) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
i <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = collection_date)) +
geom_point(size = 4) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
j <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = preservation_date)) +
geom_point(size = 4) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
k <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = time_preserved)) +
geom_point(size = 4) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
l <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = date_by_time_preserved)) +
geom_point(size = 4) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
m <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = spme)) +
geom_point(size = 4) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
n <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = captivity)) +
geom_point(size = 4) +
xlab(paste0("PC3 (",pc.per[3],"%",")")) +
ylab(paste0("PC4 (",pc.per[4],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
plot_grid(f, h, i, j, k, l, m, n, labels = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), label_size = 12, ncol= 2, align = "hv") # plot_grid() is from the cowplot package
f <- ggplot(pca.res.df, aes(x=PC5, y=PC6, fill = tissue, color = individual, shape = sex)) +
geom_point(size = 4, stroke = 1.5) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
scale_fill_manual(values = c("gray31", "white"), labels = c("antenna", "leg")) +
guides(
fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)),
shape = guide_legend(title = "Sex"),
color = guide_legend(title = "Individual"))
f
f <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = machine)) +
geom_point(size = 4) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
h <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = run_lane)) +
geom_point(size = 4) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
i <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = collection_date)) +
geom_point(size = 4) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
j <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = preservation_date)) +
geom_point(size = 4) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
k <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = time_preserved)) +
geom_point(size = 4) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
l <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = date_by_time_preserved)) +
geom_point(size = 4) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
m <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = spme)) +
geom_point(size = 4) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
n <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = captivity)) +
geom_point(size = 4) +
xlab(paste0("PC5 (",pc.per[5],"%",")")) +
ylab(paste0("PC6 (",pc.per[6],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
plot_grid(f, h, i, j, k, l, m, n, labels = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), label_size = 12, ncol= 2, align = "hv") # plot_grid() is from the cowplot package
g <- ggplot(pca.res.df, aes(x=PC7, y=PC8, fill = tissue, color = individual, shape = sex)) +
geom_point(size = 4, stroke = 1.5) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
scale_fill_manual(values = c("gray31", "white"), labels = c("antenna", "leg")) +
guides(
fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)),
shape = guide_legend(title = "Sex"),
color = guide_legend(title = "Individual"))
g
f <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = machine)) +
geom_point(size = 4) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
h <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = run_lane)) +
geom_point(size = 4) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
i <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = collection_date)) +
geom_point(size = 4) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
j <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = preservation_date)) +
geom_point(size = 4) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
k <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = time_preserved)) +
geom_point(size = 4) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
l <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = date_by_time_preserved)) +
geom_point(size = 4) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
m <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = spme)) +
geom_point(size = 4) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
n <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = captivity)) +
geom_point(size = 4) +
xlab(paste0("PC7 (",pc.per[7],"%",")")) +
ylab(paste0("PC8 (",pc.per[8],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(16,17)) +
scale_alpha_manual(values = c(1, 0.6))
plot_grid(f, h, i, j, k, l, m, n, labels = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), label_size = 12, ncol= 2, align = "hv") # plot_grid() is from the cowplot package
This is another way to view PCA laodings to understand impact of each sample on each principal component
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = tissue)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = sex)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#do PCA small multiples for run
#nothing matches for run
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = run)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#not enough data
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = run_lane)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#maybe pC4
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = collection_date)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#nothing really
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = preservation_date)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#not really enough samples to tell
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = time_preserved)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#PC2 - seems like all females preserved in the same group!
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = date_by_time_preserved)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#PC3 separates individuals
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = individual)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#there was only 1
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = spme)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
#hard to say
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
as_tibble() %>%
add_column(sample = sampleLabels,
group = captivity)
pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
cols = PC1:PC14, # column names to be stored as a SINGLE variable
names_to = "PC", # name of that new variable (column)
values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.
ggplot(pca.pivot) +
aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
geom_bar(stat="identity") +
facet_wrap(~PC) +
labs(title="PCA 'small multiples' plot",
caption=paste0("produced on ", Sys.time())) +
theme_bw() +
coord_flip()
ggplot(pca.res.df) +
aes(x=PC1, y=PC2, label=sampleLabels, shape = tissue, group=tissue) +
geom_point(size=4, alpha=0.5) +
#geom_label() +
stat_ellipse() +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw()
ggplot(pca.res.df) +
aes(x=PC1, y=PC2, label=sampleLabels, color = sex, group=sex) +
geom_point(size=4, alpha=0.5) +
#geom_label() +
stat_ellipse() +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
labs(title="PCA plot",
caption=paste0("produced on ", Sys.time())) +
coord_fixed() +
theme_bw()
#plot PCs
p <- ggplot(pca.res.df, (aes(x=PC1, y=PC2))) +
geom_point(aes(fill=tissue, shape=sex), size=3, alpha = 0.6) +
xlab(paste0("PC1 (",pc.per[1],"%",")")) +
ylab(paste0("PC2 (",pc.per[2],"%",")")) +
coord_fixed() +
theme_bw() +
scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
scale_fill_manual(values=c("black", "white"), labels = c("antenna", "leg")) +
guides(
fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)),
shape = guide_legend(title = "Sex"))
p
#ggsave("PC1_v_2_tissue_and_sex_2025_03_12.pdf", plot = p, device = "pdf", width = 85, height = 60, dpi = 300, units = "mm")
#ggsave("PC1_v_2_tissue_and_sex_2025_03_12.png", device = "png", width = 85, height = 60, dpi = 300, units = "mm")
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.7.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] cowplot_1.1.3 patchwork_1.3.0.9000
## [3] data.table_1.16.4 ggpattern_1.1.1
## [5] dendextend_1.19.0 rgl_1.3.14
## [7] stargazer_5.2.3 gt_0.11.1
## [9] plotly_4.10.4 DT_0.33
## [11] edgeR_4.2.2 limma_3.60.6
## [13] tximeta_1.22.1 DESeq2_1.44.0
## [15] SummarizedExperiment_1.34.0 Biobase_2.64.0
## [17] MatrixGenerics_1.16.0 matrixStats_1.4.1
## [19] GenomicRanges_1.56.2 GenomeInfoDb_1.40.1
## [21] IRanges_2.38.1 S4Vectors_0.42.1
## [23] BiocGenerics_0.50.0 datapasta_3.1.0
## [25] lubridate_1.9.4 forcats_1.0.0
## [27] stringr_1.5.1 dplyr_1.1.4
## [29] purrr_1.0.2 readr_2.1.5
## [31] tidyr_1.3.1 tibble_3.2.1
## [33] ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.17.1 jsonlite_1.8.9 tximport_1.32.0
## [4] magrittr_2.0.3 GenomicFeatures_1.56.0 farver_2.1.2
## [7] rmarkdown_2.29 BiocIO_1.14.0 zlibbioc_1.50.0
## [10] vctrs_0.6.5 memoise_2.0.1 Rsamtools_2.20.0
## [13] RCurl_1.98-1.16 base64enc_0.1-3 htmltools_0.5.8.1
## [16] S4Arrays_1.4.1 progress_1.2.3 AnnotationHub_3.12.0
## [19] curl_6.0.1 SparseArray_1.4.8 sass_0.4.9
## [22] bslib_0.8.0 htmlwidgets_1.6.4 httr2_1.0.7
## [25] cachem_1.1.0 GenomicAlignments_1.40.0 lifecycle_1.0.4
## [28] pkgconfig_2.0.3 Matrix_1.7-0 R6_2.5.1
## [31] fastmap_1.2.0 GenomeInfoDbData_1.2.12 digest_0.6.37
## [34] colorspace_2.1-1 AnnotationDbi_1.66.0 RSQLite_2.3.9
## [37] labeling_0.4.3 filelock_1.0.3 timechange_0.3.0
## [40] httr_1.4.7 abind_1.4-8 compiler_4.4.1
## [43] bit64_4.5.2 withr_3.0.2 BiocParallel_1.38.0
## [46] viridis_0.6.5 DBI_1.2.3 biomaRt_2.60.1
## [49] MASS_7.3-60.2 rappdirs_0.3.3 DelayedArray_0.30.1
## [52] rjson_0.2.23 tools_4.4.1 glue_1.8.0
## [55] restfulr_0.0.15 grid_4.4.1 generics_0.1.3
## [58] gtable_0.3.5 tzdb_0.4.0 ensembldb_2.28.1
## [61] hms_1.1.3 xml2_1.3.6 XVector_0.44.0
## [64] BiocVersion_3.19.1 pillar_1.10.1 vroom_1.6.5
## [67] BiocFileCache_2.12.0 lattice_0.22-6 rtracklayer_1.64.0
## [70] bit_4.5.0.1 tidyselect_1.2.1 locfit_1.5-9.10
## [73] Biostrings_2.72.1 knitr_1.49 gridExtra_2.3
## [76] ProtGenerics_1.36.0 xfun_0.50 statmod_1.5.0
## [79] stringi_1.8.4 UCSC.utils_1.0.0 lazyeval_0.2.2
## [82] yaml_2.3.10 evaluate_1.0.3 codetools_0.2-20
## [85] BiocManager_1.30.25 cli_3.6.3 munsell_0.5.1
## [88] jquerylib_0.1.4 Rcpp_1.0.14 dbplyr_2.5.0
## [91] png_0.1-8 XML_3.99-0.17 parallel_4.4.1
## [94] blob_1.2.4 prettyunits_1.2.0 AnnotationFilter_1.28.0
## [97] bitops_1.0-9 txdbmaker_1.0.1 viridisLite_0.4.2
## [100] scales_1.3.0 crayon_1.5.3 rlang_1.1.4
## [103] KEGGREST_1.44.1